> # DHS Calendar Tutorial - Example 1
> # Basic string manipulation
> 
> # download the model dataset for individual women's recode: "ZZIR62FL.DTA" 
> # the model datasets are available at http://dhsprogram.com/data/download-model-datasets.cfm
> 
> # install the foreign package to be able to read the data
> #install.packages("foreign")
> # load the library foreign
> library(foreign)
> 
> # read the stata dataset into R, without converting factors
> dta <- read.dta("C:/Data/DHS_model/ZZIR62FL.dta", convert.factors = FALSE)
> 
> # attach the data frame so I don't need to refer to it all of the time
> attach(dta)
> 
> 
> # 1) display column 1 of the calendar for the first 6 respondents
> head(vcal_1)
[1] "              00000BPPPPPPPP00000000000000000000000BPPPPPPPP00000000000000000000"
[2] "              PPPPPP000000000000000000000000BPPPPPPPP000000000000000000000000000"
[3] "              000000000000000000000000000000000000000000000000000000000000000000"
[4] "              0000000000BPPPPPPPP00000000000BPPPPPPPP000000000000000000000000000"
[5] "              0BPPPPPPPP000000000000000000000000BPPPPPPPP00000000000000000000000"
[6] "              000000000000000000000000000000000000000000000000000000000000000BPP"
> 
> 
> # 2) calculate the full length of calendar by displaying length of strings
> vcal_len <- nchar(vcal_1)
> head(vcal_len)
[1] 80 80 80 80 80 80
> 
> 
> # 3) take a piece of a string from column 1
> piece <- substr(vcal_1,44,55)
> head(piece)
[1] "00000000BPPP" "0BPPPPPPPP00" "000000000000" "0BPPPPPPPP00" "00000BPPPPPP" "000000000000"
> 
> 
> # 4) find the position of a substring within a string
> pos <- regexpr("P",vcal_1,fixed=TRUE)
> head(pos)
[1] 21 15 -1 26 17 79
> 
> 
> # 5) reverse a string
> strReverse <- function(x)
+   sapply(lapply(strsplit(x, NULL), rev), paste, collapse="")
> rev_cal <- strReverse(vcal_1)
> head(rev_cal)
[1] "00000000000000000000PPPPPPPPB00000000000000000000000PPPPPPPPB00000              "
[2] "000000000000000000000000000PPPPPPPPB000000000000000000000000PPPPPP              "
[3] "000000000000000000000000000000000000000000000000000000000000000000              "
[4] "000000000000000000000000000PPPPPPPPB00000000000PPPPPPPPB0000000000              "
[5] "00000000000000000000000PPPPPPPPB000000000000000000000000PPPPPPPPB0              "
[6] "PPB000000000000000000000000000000000000000000000000000000000000000              "
> 
> 
> # 6) trim a string of leading and trailing spaces
> trim <- function (x) gsub("^\\s+|\\s+$", "", x)
> trim_cal <- trim(vcal_1)
> head(trim_cal)
[1] "00000BPPPPPPPP00000000000000000000000BPPPPPPPP00000000000000000000"
[2] "PPPPPP000000000000000000000000BPPPPPPPP000000000000000000000000000"
[3] "000000000000000000000000000000000000000000000000000000000000000000"
[4] "0000000000BPPPPPPPP00000000000BPPPPPPPP000000000000000000000000000"
[5] "0BPPPPPPPP000000000000000000000000BPPPPPPPP00000000000000000000000"
[6] "000000000000000000000000000000000000000000000000000000000000000BPP"
> 
> 
> # 7) display the length of calendar actually used, from the trimmed version
> vcal_used <- nchar(trim_cal)
> # should be the same as v019
> head(vcal_used)
[1] 66 66 66 66 66 66
> head(v019)
[1] 66 66 66 66 66 66